{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "view-in-github"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/CoreTheGreat/HBPU-Machine-Learning-Course/blob/main/ML_Chapter3_Classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "lPboLx_o0UxI"
   },
   "source": [
    "# 第五章：深度学习\n",
    "湖北理工学院《机器学习》课程资料\n",
    "\n",
    "作者：李辉楚吴\n",
    "\n",
    "笔记内容概述: 前馈神经网络、全连接网络、Wi-Fi动作感知"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 处理原始Mat文件，与本实验无关"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read U1_G1_N10_L_L1_D0_20200408_1_Labeled.mat file\n",
    "import scipy.io as sio\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "def mat2csi(matfile):\n",
    "    ''' \n",
    "    Change mat to csi\n",
    "    Extract csi of first T-R link\n",
    "    \n",
    "    return:\n",
    "    csi: CSI data of first T-R link\n",
    "    csilabel: CSI label of first T-R link\n",
    "    timestamp: CSI timestamp of first T-R link\n",
    "    '''\n",
    "    \n",
    "    # Load the .mat file\n",
    "    mat_data = sio.loadmat(matfile)\n",
    "    \n",
    "    # For example, if there's a key called 'data':\n",
    "    raw_timestamp = mat_data['csi'][:,0]\n",
    "    raw_csi = mat_data['csi'][:,2:32]\n",
    "    raw_csilabel = mat_data['csiLabel'].reshape(-1)\n",
    "\n",
    "    # Get indices of labels > 0\n",
    "    valid_indices = raw_csilabel >= 0\n",
    "    csi = np.abs(raw_csi[valid_indices]) # Take the absolute value of the CSI data\n",
    "    csilabel = raw_csilabel[valid_indices].astype(int) # Extract the labels\n",
    "    timestamp = raw_timestamp[valid_indices].real / 10 ** 6 # Convert the timestamp to seconds, using only the real part\n",
    "    timestamp = timestamp - timestamp[0] # Normalize the timestamp\n",
    "\n",
    "    # Change to DataFrame\n",
    "    df_combined = pd.DataFrame({\n",
    "        'timestamp': timestamp,\n",
    "        'label': csilabel,\n",
    "        **{f'Channel {i}': csi[:, i-1] for i in range(1, 31)}\n",
    "    })\n",
    "\n",
    "    # Extract filename without .mat extension\n",
    "    filename = os.path.basename(matfile).split('.')[0] + '.csv'\n",
    "    \n",
    "    # Save combined DataFrame to a single CSV file\n",
    "    df_combined.to_csv(filename, index=False)\n",
    "\n",
    "    print(f'{filename} saved successfully.')\n",
    "\n",
    "    return csi, csilabel, timestamp\n",
    "\n",
    "_, _, _ = mat2csi('./Data/U1_G1_N10_L_L1_D0_20200408_1_Labeled.mat')\n",
    "_, _, _ = mat2csi('./Data/U1_G1_N30_L_L1_D0_20200408_2_Labeled.mat')\n",
    "_, _, _ = mat2csi('./Data/U1_G2_N10_L_L1_D0_20200408_1_Labeled.mat')\n",
    "_, _, _ = mat2csi('./Data/U1_G2_N30_L_L1_D0_20200408_2_Labeled.mat')\n",
    "_, _, _ = mat2csi('./Data/U1_G3_N10_L_L1_D0_20200408_1_Labeled.mat')\n",
    "_, _, _ = mat2csi('./Data/U1_G3_N30_L_L1_D0_20200408_2_Labeled.mat')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据准备\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "载入csv数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Define training and testing files\n",
    "training_files = [\n",
    "    './Data/U1_G1_N30_L_L1_D0_20200408_2_Labeled.csv',\n",
    "    './Data/U1_G2_N30_L_L1_D0_20200408_2_Labeled.csv',\n",
    "    './Data/U1_G3_N30_L_L1_D0_20200408_2_Labeled.csv']\n",
    "\n",
    "testing_files = [\n",
    "    './Data/U1_G1_N10_L_L1_D0_20200408_1_Labeled.csv',\n",
    "    './Data/U1_G2_N10_L_L1_D0_20200408_1_Labeled.csv',\n",
    "    './Data/U1_G3_N10_L_L1_D0_20200408_1_Labeled.csv'\n",
    "]\n",
    "\n",
    "# Function to read and process CSV files\n",
    "def read_csv_file(file_path):\n",
    "    print(file_path)\n",
    "    df = pd.read_csv(file_path)\n",
    "    csi = df.iloc[:, 2:].values  # All columns except 'timestamp' and 'label'\n",
    "    label = df['label'].values # 0: static, 1: up, 2: down, 3: left, 4: right\n",
    "    timestamp = df['timestamp'].values\n",
    "    print(np.unique(label))\n",
    "    return csi, label, timestamp\n",
    "\n",
    "def segment_signals(csi, label, timestamp):\n",
    "    segments = [] # Store segments\n",
    "    segment_label = label[0] # Initialize segment label\n",
    "    segment_start = 0 # Initialize segment start index\n",
    "\n",
    "    for i in range(len(label)): # Iterate through all labels\n",
    "        if label[i] != segment_label: # If the label is different from the current segment label\n",
    "            segments.append((csi[segment_start:i-1], segment_label, timestamp[segment_start:i-1])) # Append the current segment to the segments list\n",
    "            segment_start = i # Update the segment start index\n",
    "            segment_label = label[i] # Update the segment label\n",
    "\n",
    "    segments.append((csi[segment_start:], segment_label, timestamp[segment_start:])) # Append the last segment to the segments list\n",
    "    return segments\n",
    "\n",
    "# Define training and testing segments\n",
    "training_segments = []\n",
    "testing_segments = []\n",
    "\n",
    "# Read and process training files\n",
    "for file in training_files:\n",
    "    s, y, t = read_csv_file(file)\n",
    "    training_segments.extend(segment_signals(s, y, t))\n",
    "\n",
    "# Read and process testing files\n",
    "for file in testing_files:\n",
    "    s, y, t = read_csv_file(file)\n",
    "    testing_segments.extend(segment_signals(s, y, t))\n",
    "\n",
    "# Print sizes of the training segments and testing segments\n",
    "print(f\"Training segments: {len(training_segments)}\")\n",
    "\n",
    "# Print length of all training segments\n",
    "for i, (s, y, t) in enumerate(training_segments):\n",
    "    print(f\"Training Segment {i + 1}: {len(s)}\")\n",
    "\n",
    "# Print size of the testing segments\n",
    "print(f\"Testing segments: {len(testing_segments)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据对齐：通过特征提取使得每一个训练集和测试集的样本长度相同"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.stats import kurtosis\n",
    "from scipy.stats import skew\n",
    "\n",
    "# Extract features of training segments\n",
    "def extract_features(s):\n",
    "    ''' \n",
    "    Extract features of each segment\n",
    "    features include:\n",
    "    - mean\n",
    "    - std\n",
    "    - max\n",
    "    - min\n",
    "    - median\n",
    "    - kurtosis\n",
    "    - skew\n",
    "    \n",
    "    Input:\n",
    "    s: segment (N*30) in training_segments or testing_segments\n",
    "    \n",
    "    Output:\n",
    "    x: 1-D vector (8*30)\n",
    "    '''\n",
    "    x = []\n",
    "    x.extend(np.mean(s, axis=0))\n",
    "    x.extend(np.std(s, axis=0))\n",
    "    x.extend(np.max(s, axis=0))\n",
    "    x.extend(np.min(s, axis=0))\n",
    "    x.extend(np.median(s, axis=0))\n",
    "    x.extend(kurtosis(s, axis=0))\n",
    "    x.extend(skew(s, axis=0))\n",
    "\n",
    "    return np.array(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用extract_features创建训练集和测试集\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.utils.data import DataLoader, TensorDataset\n",
    "\n",
    "def one_hot_collate(batch):\n",
    "    data = torch.stack([item[0] for item in batch])\n",
    "    labels = torch.tensor([item[1] for item in batch])\n",
    "    \n",
    "    one_hot_labels = torch.zeros(labels.size(0), 4)  # 4 classes\n",
    "    one_hot_labels.scatter_(1, labels.unsqueeze(1), 1)\n",
    "    return data, one_hot_labels\n",
    "\n",
    "batch_size = 4\n",
    "\n",
    "# Build training dataset\n",
    "trX = [extract_features(s) for s, _, _ in training_segments] # Extract features of training segments\n",
    "trX = torch.tensor(trX, dtype=torch.float32) # Convert trX to tensor\n",
    "trY = [y for _, y, _ in training_segments] # Extract labels of training segments\n",
    "trY = torch.tensor(trY) # Convert trY to tensor\n",
    "\n",
    "# Build testing dataset\n",
    "teX = [extract_features(s) for s, _, _ in testing_segments] # Extract features of testing segments\n",
    "teX = torch.tensor(teX, dtype=torch.float32) # Convert teX to tensor\n",
    "teY = [y for _, y, _ in testing_segments] # Extract labels of testing segments\n",
    "teY = torch.tensor(teY) # Convert teY to tensor\n",
    "\n",
    "# Normalize trX and teX\n",
    "# Calculate mean and standard deviation from the training data\n",
    "mean = trX.mean(dim=0)\n",
    "std = trX.std(dim=0)\n",
    "\n",
    "# Normalize training data\n",
    "trX = (trX - mean) / std\n",
    "\n",
    "# Normalize testing data using training mean and std\n",
    "teX = (teX - mean) / std\n",
    "\n",
    "# Build Dataset\n",
    "trDataset = TensorDataset(trX, trY) # Create training dataset\n",
    "teDataset = TensorDataset(teX, teY) # Create testing dataset\n",
    "\n",
    "# Build loader\n",
    "trLoader = DataLoader(trDataset, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=one_hot_collate) # Create training dataloader\n",
    "teLoader = DataLoader(teDataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=one_hot_collate) # Create testing dataloader"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "定义模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn as nn\n",
    "\n",
    "class FNN(nn.Module):\n",
    "    def __init__(self, input_size, hidden_size, num_classes):\n",
    "        super(FNN, self).__init__()\n",
    "        self.fc1 = nn.Linear(input_size, hidden_size)\n",
    "        self.relu1 = nn.ReLU()\n",
    "        self.fc2 = nn.Linear(hidden_size, hidden_size)\n",
    "        self.relu2 = nn.ReLU()\n",
    "        self.fc3 = nn.Linear(hidden_size, num_classes)\n",
    "        self.softmax = nn.Softmax(dim=1)\n",
    "    \n",
    "    def forward(self, x):\n",
    "        x = self.fc1(x)\n",
    "        x = self.relu1(x)\n",
    "        x = self.fc2(x)\n",
    "        x = self.relu2(x)\n",
    "        x = self.fc3(x)\n",
    "        out = self.softmax(x)\n",
    "        return out"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用Adam作为Optimizor训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the model parameters\n",
    "hidden_size = 10\n",
    "\n",
    "# Instantiate the model\n",
    "input_size = trX.shape[1]\n",
    "num_classes = 4 # 3 movements and static\n",
    "model = FNN(input_size, hidden_size, num_classes)\n",
    "print(model)\n",
    "\n",
    "# Define loss function and optimizer\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "optimizer = torch.optim.Adam(model.parameters())\n",
    "\n",
    "# Lists to store losses\n",
    "train_losses = []\n",
    "te_losses = []\n",
    "\n",
    "# Number of epochs\n",
    "num_epochs = 200\n",
    "\n",
    "for epoch in range(num_epochs):\n",
    "    model.train()\n",
    "    batch_losses = []\n",
    "    \n",
    "    for batch_x, batch_y in trLoader:\n",
    "        # Forward pass\n",
    "        outputs = model(batch_x)\n",
    "        loss = criterion(outputs, batch_y)\n",
    "        \n",
    "        # Backward pass and optimize\n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        \n",
    "        batch_losses.append(loss.item())\n",
    "    \n",
    "    # Calculate average training loss for this epoch\n",
    "    avg_train_loss = sum(batch_losses) / len(batch_losses)\n",
    "    train_losses.append(avg_train_loss)\n",
    "    \n",
    "    # Evaluate on cross-validation set\n",
    "    model.eval()\n",
    "    te_batch_losses = []\n",
    "    with torch.no_grad():\n",
    "        for te_x, te_y in teLoader:\n",
    "            te_outputs = model(te_x)\n",
    "            te_loss = criterion(te_outputs, te_y)\n",
    "            te_batch_losses.append(te_loss.item())\n",
    "    \n",
    "    avg_te_loss = sum(te_batch_losses) / len(te_batch_losses)\n",
    "    te_losses.append(avg_te_loss)\n",
    "    \n",
    "    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, CV Loss: {avg_te_loss:.4f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "计算精度与学习曲线"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Calculate and print accuracies for training and cross-validation sets\n",
    "model.eval()\n",
    "with torch.no_grad():\n",
    "    # Training set accuracy\n",
    "    tr_correct = 0\n",
    "    tr_total = 0\n",
    "    for images, labels in trLoader:\n",
    "        outputs = model(images)\n",
    "        _, predicted = torch.max(outputs, 1)\n",
    "        _, true_labels = torch.max(labels, 1)\n",
    "        tr_total += labels.size(0)\n",
    "        tr_correct += (predicted == true_labels).sum().item()\n",
    "    \n",
    "    tr_accuracy = 100 * tr_correct / tr_total\n",
    "    \n",
    "    # test set accuracy\n",
    "    te_correct = 0\n",
    "    te_total = 0\n",
    "    for images, labels in teLoader:\n",
    "        outputs = model(images)\n",
    "        _, predicted = torch.max(outputs, 1)\n",
    "        _, true_labels = torch.max(labels, 1)\n",
    "        te_total += labels.size(0)\n",
    "        te_correct += (predicted == true_labels).sum().item()\n",
    "    \n",
    "    te_accuracy = 100 * te_correct / te_total\n",
    "\n",
    "print(f'Accuracy on training set: {tr_accuracy:.2f}%')\n",
    "print(f'Accuracy on cross-validation set: {te_accuracy:.2f}%')\n",
    "\n",
    "# Plot training and cross-validation losses\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.plot(range(1, num_epochs+1), train_losses, label='Training Loss')\n",
    "plt.plot(range(1, num_epochs+1), te_losses, label='Testing Loss')\n",
    "plt.xlabel('Epoch')\n",
    "plt.ylabel('Loss')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyO5gS9/MePw+FDiXJA07L6y",
   "include_colab_link": true,
   "provenance": []
  },
  "kernelspec": {
   "display_name": "machinelearning",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
